package au.com.acpfg.tpp; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; import javax.xml.stream.XMLStreamReader; /* * Class defining the pepXML object */ public class pepXML { //private String searchEngine; // stores the name/type of search tool used private String srcFile; private String specId; private double mass; private int charge; private String peptide; private char prevAA; private char nextAA; private String modPeptide; private double iniProb; private double wt; // variable used in protXML files private double nsp; // variable used in protXML files private int ntt; // variable used in protXML files private int nspecs; // variable used in protXML files private PepXMLResultInterface globals; private HashMap<Integer, Integer> aaMods; // holds the AA modification // positions private String m_protein; // accession(s) private String m_protein_descr; // database description (if any) private int m_hit_rank; private int m_matched_ions, m_total_ions; // Variables for XTANDEM search results private double hyperscore; private double nextscore; private double xtandem_expect; // Variables for MASCOT search results private double mascot_ionscore; private double mascot_identityscore; private int mascot_star; private double mascot_homologyscore; private double mascot_expect; // Variables for SEQUEST search results private double sequest_xcorr; private double sequest_deltacn; private double sequest_deltacnstar; private double sequest_spscore; private double sequest_sprank; // peptide prophet assigned scores private double m_pp_ntt, m_pp_fval, m_pp_massd, m_pp_nmc; public pepXML() { globals = new PepXMLGlobals(); srcFile = null; } public pepXML(String txt) { this(); srcFile = txt; } public String getFilename() { return srcFile; } // public SET functions (need them for parsing protXML files) public void setPeptide(String txt) { this.peptide = txt; } public void setCharge(String txt) { this.charge = Integer.parseInt(txt); } public void setIniProb(String txt) { this.iniProb = Double.parseDouble(txt); } public void setNSP(String txt) { this.nsp = Double.parseDouble(txt); } public void setWt(String txt) { this.wt = Double.parseDouble(txt); } public void setMass(String txt) { this.mass = Double.parseDouble(txt); } public void setNTT(String txt) { this.ntt = Integer.parseInt(txt); } public void setNspecs(String txt) { this.nspecs = Integer.parseInt(txt); } // public GET functions public String getSpecId() { return specId; } public double getMass() { return mass; } public int getCharge() { return charge; } public String getPeptide() { return peptide; } public char getPrevAA() { return prevAA; } public char getNextAA() { return nextAA; } public String getProteinIds() { return m_protein; } public String getProteinDescr() { return m_protein_descr; } public int hitRank() { return m_hit_rank; } public int getMatchedIons() { return m_matched_ions; } public int getTotalIons() { return m_total_ions; } public String getModPeptide() { return modPeptide; } public double getHyperscore() { return hyperscore; } public double getNextscore() { return nextscore; } public double getXtandem_expect() { return xtandem_expect; } public double getIniProb() { return iniProb; } public double getWt() { return wt; } public double getNSP() { return nsp; } public int getNTT() { return ntt; } public int getNspecs() { return nspecs; } // MASCOT variables public double getMascot_ionscore() { return mascot_ionscore; } public double getMascot_identityscore() { return mascot_identityscore; } public int getMascot_star() { return mascot_star; } public double getMascot_homologyscore() { return mascot_homologyscore; } public double getMascot_expect() { return mascot_expect; } // SEQUEST variables public double getSequest_xcorr() { return sequest_xcorr; } public double getSequest_deltacn() { return sequest_deltacn; } public double getSequest_deltacnstar() { return sequest_deltacnstar; } public double getSequest_spscore() { return sequest_spscore; } public double getSequest_sprank() { return sequest_sprank; } public double getPP_fval() { return m_pp_fval; } public double getPP_ntt() { return m_pp_ntt; } public double getPP_nmc() { return m_pp_nmc; } public double getPP_massd() { return m_pp_massd; } /* * Function parses the given XML stream and records the relevant information * found in it. */ public void parse_pepXML_line(XMLStreamReader xmlStreamReader) { String attrName = null; String attrValue = null; for (int i = 0; i < xmlStreamReader.getAttributeCount(); i++) { attrName = xmlStreamReader.getAttributeLocalName(i); attrValue = xmlStreamReader.getAttributeValue(i); if (attrName.equals("spectrum")) this.specId = attrValue; else if (attrName.equals("assumed_charge")) this.charge = Integer.parseInt(attrValue); else if (attrName.equals("precursor_neutral_mass")) this.mass = Double.parseDouble(attrValue); else if (attrName.equals("peptide")) this.peptide = attrValue; else if (attrName.equals("peptide_prev_aa")) this.prevAA = attrValue.charAt(0); else if (attrName.equals("peptide_next_aa")) this.nextAA = attrValue.charAt(0); else if (attrName.equals("hit_rank")) { m_hit_rank = Integer.parseInt(attrValue); } else if (attrName.equals("protein_descr")) { m_protein_descr = attrValue; } else if (attrName.equals("protein")) { m_protein = attrValue; } else if (attrName.equals("num_matched_ions")) { m_matched_ions = Integer.parseInt(attrValue); } else if (attrName.equals("tot_num_ions")) { m_total_ions = Integer.parseInt(attrValue); } } } /* * <search_hit hit_rank="1" peptide="VGQTLLK" peptide_prev_aa="K" peptide_next_aa=" P" protein="CD909945_5" num_tot_proteins="1" num_matched_ions="12" tot_num_ions= "12" calc_neutral_pep_mass="1365.8805" massdiff="-0.1256" num_tol_term="2" num_m issed_cleavages="0" is_rejected="0" protein_descr="UniRef100_Q0TCD1 Cluster: Pro bable general secretion pathway protein I; n=4; Escherichia coli|Rep: Probable g eneral secretion pathway protein I - Escherichia coli O6:K15:H31 (strain 536 \ U PEC), partial (7%)"> <search_score name="ionscore" value="28.12"/> <search_score name="identityscore" value="51.98"/> <search_score name="star" value="1"/> <search_score name="homologyscore" value="40.47"/> <search_score name="expect" value="12.16"/> <analysis_result analysis="peptideprophet"> <peptideprophet_result probability="0.0695" all_ntt_prob="(0.0000,0.0006,0.0695) "> <search_score_summary> <parameter name="fval" value="-1.6228"/> <parameter name="ntt" value="2"/> <parameter name="nmc" value="0"/> <parameter name="massd" value="-0.126"/> </search_score_summary> </peptideprophet_result> </analysis_result> */ /** * Process peptideprophet-assigned score into member variables */ public void record_peptideprophet_scores(XMLStreamReader xmlStreamReader) { String attrName = null; String attrValue= null; String name = null; String val = null; for (int i=0; i<xmlStreamReader.getAttributeCount(); i++) { attrName = xmlStreamReader.getAttributeLocalName(i); attrValue= xmlStreamReader.getAttributeValue(i); if (attrName.equals("name")) { name = attrValue.trim().toLowerCase(); } else if (attrName.equals("value")) { val = attrValue; } } if (name != null && val != null) { if (name.equals("ntt")) { m_pp_ntt = Double.parseDouble(val); } else if (name.equals("nmc")) { m_pp_nmc = Double.parseDouble(val); } else if (name.equals("massd")) { m_pp_massd = Double.parseDouble(val); } else if (name.equals("fval")) { m_pp_fval = Double.parseDouble(val); } } } /* * Function parses amino acid modifications into aaMods variable */ public void record_AA_mod(XMLStreamReader xmlStreamReader) { String attrName = null; String attrValue = null; int k = -1; int v = 0; if (this.aaMods == null) this.aaMods = new HashMap<Integer, Integer>(); for (int i = 0; i < xmlStreamReader.getAttributeCount(); i++) { attrName = xmlStreamReader.getAttributeLocalName(i); attrValue = xmlStreamReader.getAttributeValue(i); if (attrName.equals("position")) k = Integer.parseInt(attrValue) - 1; if (attrName.equals("mass")) { v = (int) Math.round(Double.parseDouble(attrValue)); if (k > -1 && v > 0) this.aaMods.put(k, v); else { System.err.printf("\nERROR: mod_aminoacid_mass line pepXML::record_AA_mod()\n"); System.err.println(this.specId + "\n"); System.exit(-1); } } } } /* * Function parses search_score lines */ public void parse_search_score_line(XMLStreamReader xmlStreamReader) { String attrValue = null; for (int i = 0, j = 1; i < xmlStreamReader.getAttributeCount(); i++, j++) { attrValue = xmlStreamReader.getAttributeValue(i); /* * X!Tandem search scores */ if (attrValue.equals("hyperscore")) { globals.addSearchEngine("XTANDEM"); this.hyperscore = Double.parseDouble(xmlStreamReader .getAttributeValue(j)); } if (attrValue.equals("nextscore")) this.nextscore = Double.parseDouble(xmlStreamReader .getAttributeValue(j)); if (attrValue.equals("expect")) this.xtandem_expect = Double.parseDouble(xmlStreamReader .getAttributeValue(j)); /* * Mascot search scores */ if (attrValue.equals("ionscore")) { globals.addSearchEngine("MASCOT"); this.mascot_ionscore = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); } if (attrValue.equals("identityscore")) this.mascot_identityscore = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); if (attrValue.equals("star")) this.mascot_star = Integer.parseInt(xmlStreamReader. getAttributeValue(j)); if (attrValue.equals("homologyscore")) this.mascot_homologyscore = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); if (attrValue.equals("expect")) this.mascot_expect = Double.parseDouble(xmlStreamReader .getAttributeValue(j)); /* * Sequest search scores */ if (attrValue.equals("xcorr")) { globals.addSearchEngine("SEQUEST"); this.sequest_xcorr = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); } if (attrValue.equals("deltacn")) this.sequest_deltacn = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); if (attrValue.equals("deltacnstar")) this.sequest_deltacnstar = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); if (attrValue.equals("spscore")) this.sequest_spscore = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); if (attrValue.equals("sprank")) this.sequest_sprank = Double.parseDouble(xmlStreamReader. getAttributeValue(j)); } } /* * Function parses out peptide probability */ public void record_iniProb(XMLStreamReader xmlStreamReader) { String attrName = null; String attrValue = null; for (int i = 0; i < xmlStreamReader.getAttributeCount(); i++) { attrName = xmlStreamReader.getAttributeLocalName(i); attrValue = xmlStreamReader.getAttributeValue(i); if (attrName.equals("probability")) this.iniProb = Double.parseDouble(attrValue); } } /* * Function annotates modPeptide */ public void annotate_modPeptide() { if (this.aaMods == null) this.modPeptide = this.peptide; // peptide has no modifications else { modPeptide = ""; for (int i = 0; i < this.peptide.length(); i++) { this.modPeptide += this.peptide.charAt(i); if (this.aaMods.containsKey(i)) this.modPeptide += "[" + this.aaMods.get(i) + "]"; } } } public boolean hasProteinDescr() { return (m_protein_descr != null); } }